library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.5 ✓ purrr 0.3.4
✓ tibble 3.1.6 ✓ dplyr 1.0.7
✓ tidyr 1.1.4 ✓ stringr 1.4.0
✓ readr 2.1.0 ✓ forcats 0.5.1
── Conflicts ─────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
books <- read_csv("books.csv")
Rows: 11123 Columns: 13
── Column specification ─────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): title, authors, isbn, isbn13, language_code, publication_date, publisher
dbl (6): rowid, bookID, average_rating, num_pages, ratings_count, text_reviews_count
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(books)
[1] "rowid" "bookID" "title" "authors"
[5] "average_rating" "isbn" "isbn13" "language_code"
[9] "num_pages" "ratings_count" "text_reviews_count" "publication_date"
[13] "publisher"
dim(books)
[1] 11123 13
library(CodeClanData)
Attaching package: ‘CodeClanData’
The following object is masked from ‘package:dplyr’:
starwars
The following object is masked from ‘package:tidyr’:
population
glimpse(books)
Rows: 11,123
Columns: 13
$ rowid <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, …
$ bookID <dbl> 1, 2, 4, 5, 8, 9, 10, 12, 13, 14, 16, 18, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 45, 50, 51, 53, 54, 55, 5…
$ title <chr> "Harry Potter and the Half-Blood Prince (Harry Potter #6)", "Harry Potter and the Order of the Phoenix (Harry Potter #5)", "H…
$ authors <chr> "J.K. Rowling/Mary GrandPré", "J.K. Rowling/Mary GrandPré", "J.K. Rowling", "J.K. Rowling/Mary GrandPré", "J.K. Rowling/Mary Gr…
$ average_rating <dbl> 4.57, 4.49, 4.42, 4.56, 4.78, 3.74, 4.73, 4.38, 4.38, 4.22, 4.22, 4.38, 4.21, 3.44, 3.87, 4.07, 3.90, 3.83, 3.86, 3.91, 3.93, 4…
$ isbn <chr> "0439785960", "0439358078", "0439554896", "043965548X", "0439682584", "0976540606", "0439827604", "0517226952", "0345453743", "…
$ isbn13 <chr> "9780439785969", "9780439358071", "9780439554893", "9780439655484", "9780439682589", "9780976540601", "9780439827607", "9780517…
$ language_code <chr> "eng", "eng", "eng", "eng", "eng", "en-US", "eng", "eng", "eng", "eng", "eng", "eng", "eng", "eng", "eng", "eng", "eng", "eng",…
$ num_pages <dbl> 652, 870, 352, 435, 2690, 152, 3342, 815, 815, 215, 6, 815, 544, 55, 256, 335, 304, 299, 254, 324, 270, 1728, 1184, 398, 1216, …
$ ratings_count <dbl> 2095690, 2153167, 6333, 2339585, 41428, 19, 28242, 3628, 249558, 4930, 1266, 2877, 248558, 7270, 2088, 72451, 49240, 45712, 487…
$ text_reviews_count <dbl> 27591, 29221, 244, 36325, 164, 1, 808, 254, 4080, 460, 253, 195, 9396, 499, 131, 4245, 2211, 2257, 2238, 3301, 2085, 1550, 91, …
$ publication_date <chr> "9/16/2006", "9/1/2004", "11/1/2003", "5/1/2004", "9/13/2004", "4/26/2005", "9/12/2005", "11/1/2005", "4/30/2002", "8/3/2004", …
$ publisher <chr> "Scholastic Inc.", "Scholastic Inc.", "Scholastic", "Scholastic Inc.", "Scholastic", "Nimble Books", "Scholastic", "Gramercy Bo…
now i would like to see if there any miss value
sum(is.na(books))
[1] 0
i am happy now i dont have any missing value i will try to foun out answeres for some qusition
filter(books, ratings_count > 15000)
filter(books, average_rating == 0)
arrange(books, title)
arrange(books,desc(authors), desc(title))
books %>%
distinct(language_code) %>%
nrow()
[1] 27
books %>%
distinct(authors) %>%
nrow()
[1] 6639
now i would like to see the top to and last books reating and rivews
books %>%
slice_max(ratings_count)
```{r}
Error: attempt to use zero-length variable name
here i try to see the max of pages number
books %>%
slice_max(num_pages)
now iam traying to see which writer has most books
books
NA
lts see the avrg of the
books %>%
filter(ratings_count > 4000) %>%
group_by(publisher)
NA
books %>%
filter(ratings_count > 4000) %>%
group_by(publisher) %>%
summarise(mean_rating = mean(average_rating)) %>%
arrange(desc(mean_rating))